Escape Room Survey¶

Dependencies¶

import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

# Visualisation
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(color_codes=True)
from scipy.stats import chi2_contingency, ranksums
from pandas import Series, DataFrame
from scipy import stats

# if matplotlib is not set inline, you will not see plots
#alternatives auto gtk gtk2 inline osx qt qt5 wx tk
#%matplotlib auto
#%matplotlib qt
%matplotlib inline

Import data¶

# path = "datos.csv"
# df = pd.read_csv(path)
from azureml import Workspace

ws = Workspace()
ds = ws.datasets['ERDatos.csv']
df = ds.to_dataframe()
df = pd.DataFrame(data=df)

# df = df.replace({' \(Muy de acuerdo\)': ''}, regex=True)
# df = df.replace({' \(Muy en desacuerdo\)': ''}, regex=True)
df.head()

df.dtypes

Timestamp                object
Age                       int64
Sex                      object
GeneralOpinion            int64
LikeGames                 int64
EasyIWEB                 object
LearningEffectiveness    object
Engagement                int64
Difficulty               object
Organisation             object
PreferOverLab            object
LearnMoreThanLab         object
OtherSubjects            object
Recommend                object
Opinion                  object
dtype: object

numList = [
    "Age",
    "LikeGames",
    "EasyIWEB",
    "LearningEffectiveness",
    "Engagement",
    "Difficulty",
    "Organisation",
    "PreferOverLab",
    "LearnMoreThanLab",
]
for str in numList:
    df[str] = df[str].replace("NS/NC", None)
    mean = df[df[str] != None][str].apply(lambda x: float(x)).mean()
    df[str] = df[str].replace("NS/NC", mean)
    df[str] = df[str].apply(lambda x: float(x))

df.describe(include="all").T

def checkHypotheses(name, p_val):
    print(name)
    print("   The p value is", p_val)
    if p_val < 0.05 :
        print("   The null hyphotheses is rejected: " +
              "The two samples are statistically different")
    else :
        print("   Failed to reject the null hypotheses: " +
              "The two samples are alike")
    print('**************************************' +
          '**************************************')

Gender bias¶

st = df.groupby(['GeneralOpinion', 'Sex'])['GeneralOpinion'].count().unstack('Sex').fillna(0)
st.plot(kind='bar', stacked=True)

<matplotlib.axes._subplots.AxesSubplot at 0x7ff57ffaeeb8>

st = df.groupby(['LikeGames', 'Sex'])['LikeGames'].count().unstack('Sex').fillna(0)
st.plot(kind='bar', stacked=True)

<matplotlib.axes._subplots.AxesSubplot at 0x7ff57fe5ccc0>

st = df.groupby(['Engagement', 'Sex'])['Engagement'].count().unstack('Sex').fillna(0)
st.plot(kind='bar', stacked=True)

<matplotlib.axes._subplots.AxesSubplot at 0x7ff5803395f8>

womenLikeGames = df[df['Sex']=="Mujer"]['LikeGames']
menLikeGames = df[df['Sex']=="Hombre"]['LikeGames']
womenLikeER = df[df['Sex']=="Mujer"]['Engagement']
menLikeER = df[df['Sex']=="Hombre"]['Engagement']
womenEasy = df[df['Sex']=="Mujer"]['EasyIWEB']
menEasy = df[df['Sex']=="Hombre"]['EasyIWEB']
womenGeneral = df[df['Sex']=="Mujer"]['GeneralOpinion']
menGeneral = df[df['Sex']=="Hombre"]['GeneralOpinion']
womenKnowledge = df[df['Sex']=="Mujer"]['LearningEffectiveness']
menKnowledge = df[df['Sex']=="Hombre"]['LearningEffectiveness']
womenLevel = df[df['Sex']=="Mujer"]['Difficulty']
menLevel = df[df['Sex']=="Hombre"]['Difficulty']
womenOrganised = df[df['Sex']=="Mujer"]['Organisation']
menOrganised = df[df['Sex']=="Hombre"]['Organisation']
womenPrefer = df[df['Sex']=="Mujer"]['PreferOverLab']
menPrefer = df[df['Sex']=="Hombre"]['PreferOverLab']
womenLearn = df[df['Sex']=="Mujer"]['LearnMoreThanLab']
menLearn = df[df['Sex']=="Hombre"]['LearnMoreThanLab']
womenOther = df[df['Sex']=="Mujer"]['OtherSubjects']
menOther = df[df['Sex']=="Hombre"]['OtherSubjects']
womenRecommend = df[df['Sex']=="Mujer"]['Recommend']
menRecommend = df[df['Sex']=="Hombre"]['Recommend']

womenLikeGames.describe(include="all").T

count    21.000000
mean      3.523810
std       0.928388
min       2.000000
25%       3.000000
50%       4.000000
75%       4.000000
max       5.000000
Name: LikeGames, dtype: float64

menLikeGames.describe(include="all").T

count    43.000000
mean      4.534884
std       0.667220
min       3.000000
25%       4.000000
50%       5.000000
75%       5.000000
max       5.000000
Name: LikeGames, dtype: float64

z_stat, p_val = ranksums(menLikeER, womenLikeER) 
checkHypotheses("Engagement",p_val)
z_stat, p_val = ranksums(menLikeGames, womenLikeGames) 
checkHypotheses("Games",p_val)
z_stat, p_val = ranksums(menKnowledge, womenKnowledge) 
checkHypotheses("Learning Effectiveness",p_val)
z_stat, p_val = ranksums(menEasy, womenEasy) 
checkHypotheses("Easy IWEB",p_val)
z_stat, p_val = ranksums(menGeneral, womenGeneral) 
checkHypotheses("General opinion",p_val)
z_stat, p_val = ranksums(menLevel, womenLevel) 
checkHypotheses("Difficulty ER",p_val)
z_stat, p_val = ranksums(menOrganised, womenOrganised) 
checkHypotheses("Organisation",p_val)
z_stat, p_val = ranksums(menPrefer, womenPrefer) 
checkHypotheses("Prefer over lab",p_val)
z_stat, p_val = ranksums(menLearn, womenLearn) 
checkHypotheses("Learning effectiveness over lab",p_val)
z_stat, p_val = ranksums(menRecommend, womenRecommend) 
checkHypotheses("Recommend",p_val)

Engagement
   The p value is 0.348989636302
   Failed to reject the null hypotheses: The two samples are alike
****************************************************************************
Games
   The p value is 0.000119918416005
   The null hyphotheses is rejected: The two samples are statistically different
****************************************************************************
Learning Effectiveness
   The p value is 0.23531662741
   Failed to reject the null hypotheses: The two samples are alike
****************************************************************************
Easy IWEB
   The p value is 0.515316911072
   Failed to reject the null hypotheses: The two samples are alike
****************************************************************************
General opinion
   The p value is 0.965784868225
   Failed to reject the null hypotheses: The two samples are alike
****************************************************************************
Difficulty ER
   The p value is 0.224224489233
   Failed to reject the null hypotheses: The two samples are alike
****************************************************************************
Organisation
   The p value is 0.0374861296661
   The null hyphotheses is rejected: The two samples are statistically different
****************************************************************************
Prefer over lab
   The p value is 0.224224489233
   Failed to reject the null hypotheses: The two samples are alike
****************************************************************************
Learning effectiveness over lab
   The p value is 0.390942609374
   Failed to reject the null hypotheses: The two samples are alike
****************************************************************************
Recommend
   The p value is 0.529261722029
   Failed to reject the null hypotheses: The two samples are alike
****************************************************************************

More insights¶

df.corr()

Learning Effectiveness vs. Engagement¶

st = df.groupby(['LearningEffectiveness', 'Engagement'])['LearningEffectiveness'].count().unstack('Engagement').fillna(0)
st.plot(kind='bar', stacked=True)

<matplotlib.axes._subplots.AxesSubplot at 0x7ff57fb94080>

np.corrcoef(df['LearningEffectiveness'], df['Engagement'])

array([[ 1.      ,  0.627534],
       [ 0.627534,  1.      ]])

THRESHOLD = 2
effective = df['LearningEffectiveness']>THRESHOLD
engaging = df['Engagement']>THRESHOLD
z_stat, p_val = ranksums(effective, engaging) 
checkHypotheses("Are learning effectiveness and engagement equally distributed?",p_val)

Are learning effectiveness and engagement equally distributed?
   The p value is 0.169911841731
   Failed to reject the null hypotheses: The two samples are alike
****************************************************************************

np.corrcoef(effective, engaging)

array([[ 1.        ,  0.49136232],
       [ 0.49136232,  1.        ]])

THRESHOLD = 3
effective = df['LearningEffectiveness']>THRESHOLD
engaging = df['Engagement']>THRESHOLD
z_stat, p_val = ranksums(effective, engaging) 
checkHypotheses("Are learning effectiveness and engagement equally distributed?",p_val)

Are learning effectiveness and engagement equally distributed?
   The p value is 0.00952879138632
   The null hyphotheses is rejected: The two samples are statistically different
****************************************************************************

np.corrcoef(effective, engaging)

array([[ 1.        ,  0.40574111],
       [ 0.40574111,  1.        ]])

Learning Effectiveness vs. EasyIWEB¶

st = df.groupby(['LearningEffectiveness', 'EasyIWEB'])['LearningEffectiveness'].count().unstack('EasyIWEB').fillna(0)
st.plot(kind='bar', stacked=True)

<matplotlib.axes._subplots.AxesSubplot at 0x7ff57faa3b38>

np.corrcoef(df['LearningEffectiveness'], df['EasyIWEB'])

array([[ 1.        ,  0.42894368],
       [ 0.42894368,  1.        ]])

THRESHOLD = 2
effective = df['LearningEffectiveness']>THRESHOLD
easy = df['EasyIWEB']>THRESHOLD
z_stat, p_val = ranksums(effective, easy) 
checkHypotheses("Are learning effectiveness and opinion on IWEB level equally distributed?",p_val)

Are learning effectiveness and opinion on IWEB level equally distributed?
   The p value is 0.222469210665
   Failed to reject the null hypotheses: The two samples are alike
****************************************************************************

THRESHOLD = 3
effective = df['LearningEffectiveness']>THRESHOLD
easy = df['EasyIWEB']>THRESHOLD
z_stat, p_val = ranksums(effective, easy) 
checkHypotheses("Are learning effectiveness and opinion on IWEB level equally distributed?",p_val)

Are learning effectiveness and opinion on IWEB level equally distributed?
   The p value is 0.00605156428839
   The null hyphotheses is rejected: The two samples are statistically different
****************************************************************************

	count	unique	top	freq	mean	std	min	25%	50%	75%	max
Timestamp	64	57	2018/12/17 1:21:28 p. m. CET	2	NaN	NaN	NaN	NaN	NaN	NaN	NaN
Age	64	NaN	NaN	NaN	22.0469	3.05728	20	21	21	22	45
Sex	64	2	Hombre	43	NaN	NaN	NaN	NaN	NaN	NaN	NaN
GeneralOpinion	64	NaN	NaN	NaN	4.28125	0.863157	1	4	4	5	5
LikeGames	64	NaN	NaN	NaN	4.20312	0.894067	2	4	4	5	5
EasyIWEB	64	NaN	NaN	NaN	2.79688	0.945829	1	2	3	3	5
LearningEffectiveness	64	NaN	NaN	NaN	3.35938	1.23914	1	2.75	3.5	4	5
Engagement	64	NaN	NaN	NaN	4.20312	1.1294	1	4	5	5	5
Difficulty	64	NaN	NaN	NaN	3.4375	0.888641	1	3	3.5	4	5
Organisation	64	NaN	NaN	NaN	3.90625	1.01916	1	3	4	5	5
PreferOverLab	64	NaN	NaN	NaN	4.42188	0.939515	1	4	5	5	5
LearnMoreThanLab	64	NaN	NaN	NaN	3.70312	1.28087	1	3	4	5	5
OtherSubjects	64	2	Si	60	NaN	NaN	NaN	NaN	NaN	NaN	NaN
Recommend	64	2	Si	56	NaN	NaN	NaN	NaN	NaN	NaN	NaN
Opinion	19	19	Creo que es una gran iniciativa para incentiva...	1	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	Age	GeneralOpinion	LikeGames	EasyIWEB	LearningEffectiveness	Engagement	Difficulty	Organisation	PreferOverLab	LearnMoreThanLab
Age	1.000000	-0.167480	0.106795	-0.095461	-0.113455	-0.228055	0.079969	-0.227809	-0.283300	0.060358
GeneralOpinion	-0.167480	1.000000	0.048207	0.246072	0.631187	0.836006	0.064669	0.607849	0.458140	0.464361
LikeGames	0.106795	0.048207	1.000000	0.368665	-0.009626	0.052808	0.026222	0.003811	0.047537	-0.071253
EasyIWEB	-0.095461	0.246072	0.368665	1.000000	0.428944	0.262128	-0.062557	0.259863	0.080102	0.145966
LearningEffectiveness	-0.113455	0.631187	-0.009626	0.428944	1.000000	0.627534	-0.116221	0.479581	0.372176	0.538324
Engagement	-0.228055	0.836006	0.052808	0.262128	0.627534	1.000000	0.068205	0.582202	0.621040	0.426386
Difficulty	0.079969	0.064669	0.026222	-0.062557	-0.116221	0.068205	1.000000	0.081059	-0.015447	0.032249
Organisation	-0.227809	0.607849	0.003811	0.259863	0.479581	0.582202	0.081059	1.000000	0.356929	0.306645
PreferOverLab	-0.283300	0.458140	0.047537	0.080102	0.372176	0.621040	-0.015447	0.356929	1.000000	0.488243
LearnMoreThanLab	0.060358	0.464361	-0.071253	0.145966	0.538324	0.426386	0.032249	0.306645	0.488243	1.000000

	Timestamp	Age	Sex	GeneralOpinion	LikeGames	EasyIWEB	LearningEffectiveness	Engagement	Difficulty	Organisation	PreferOverLab	LearnMoreThanLab	OtherSubjects	Recommend	Opinion
0	2018/12/17 1:16:49 p. m. CET	22	Mujer	4	5	2	2	4	5	5	5	4	Si	No	NaN
1	2018/12/17 1:21:04 p. m. CET	45	Hombre	3	5	NS/NC	NS/NC	2	4	2	2	NS/NC	Si	Si	NaN
2	2018/12/17 1:21:06 p. m. CET	22	Hombre	4	4	2	3	4	2	2	4	5	Si	Si	Mejorar la interfaz de usuario, la pantalla de...
3	2018/12/17 1:21:21 p. m. CET	22	Hombre	4	4	3	4	3	4	5	4	4	Si	Si	Enhorabuena por el trabajo, nos lo hemos pasad...
4	2018/12/17 1:21:21 p. m. CET	20	Mujer	5	3	3	4	5	3	5	5	4	Si	Si	NaN